Skip to main content
Version: Next

Query

Aspects

queryProperties

Information about a Query against one or more data assets (e.g. Tables or Views).

Schema
{
"type": "record",
"Aspect": {
"name": "queryProperties"
},
"name": "QueryProperties",
"namespace": "com.linkedin.query",
"fields": [
{
"type": {
"type": "record",
"name": "QueryStatement",
"namespace": "com.linkedin.query",
"fields": [
{
"type": "string",
"name": "value",
"doc": "The query text"
},
{
"type": {
"type": "enum",
"symbolDocs": {
"SQL": "A SQL Query"
},
"name": "QueryLanguage",
"namespace": "com.linkedin.query",
"symbols": [
"SQL"
]
},
"name": "language",
"default": "SQL",
"doc": "The language of the Query, e.g. SQL."
}
],
"doc": "A query statement against one or more data assets."
},
"name": "statement",
"doc": "The Query Statement."
},
{
"Searchable": {},
"type": {
"type": "enum",
"symbolDocs": {
"MANUAL": "The query was entered manually by a user (via the UI).",
"SYSTEM": "The query was discovered by a crawler."
},
"name": "QuerySource",
"namespace": "com.linkedin.query",
"symbols": [
"MANUAL",
"SYSTEM"
]
},
"name": "source",
"doc": "The source of the Query"
},
{
"Searchable": {
"boostScore": 10.0,
"enableAutocomplete": true,
"fieldType": "WORD_GRAM"
},
"type": [
"null",
"string"
],
"name": "name",
"default": null,
"doc": "Optional display name to identify the query."
},
{
"type": [
"null",
"string"
],
"name": "description",
"default": null,
"doc": "The Query description."
},
{
"Searchable": {
"/actor": {
"fieldName": "createdBy",
"fieldType": "URN"
},
"/time": {
"fieldName": "createdAt",
"fieldType": "DATETIME"
}
},
"type": {
"type": "record",
"name": "AuditStamp",
"namespace": "com.linkedin.common",
"fields": [
{
"type": "long",
"name": "time",
"doc": "When did the resource/association/sub-resource move into the specific lifecycle stage represented by this AuditEvent."
},
{
"java": {
"class": "com.linkedin.common.urn.Urn"
},
"type": "string",
"name": "actor",
"doc": "The entity (e.g. a member URN) which will be credited for moving the resource/association/sub-resource into the specific lifecycle stage. It is also the one used to authorize the change."
},
{
"java": {
"class": "com.linkedin.common.urn.Urn"
},
"type": [
"null",
"string"
],
"name": "impersonator",
"default": null,
"doc": "The entity (e.g. a service URN) which performs the change on behalf of the Actor and must be authorized to act as the Actor."
},
{
"type": [
"null",
"string"
],
"name": "message",
"default": null,
"doc": "Additional context around how DataHub was informed of the particular change. For example: was the change created by an automated process, or manually."
}
],
"doc": "Data captured on a resource/association/sub-resource level giving insight into when that resource/association/sub-resource moved into a particular lifecycle stage, and who acted to move it into that specific lifecycle stage."
},
"name": "created",
"doc": "Audit stamp capturing the time and actor who created the Query."
},
{
"Searchable": {
"/actor": {
"fieldName": "lastModifiedBy",
"fieldType": "URN"
},
"/time": {
"fieldName": "lastModifiedAt",
"fieldType": "DATETIME"
}
},
"type": "com.linkedin.common.AuditStamp",
"name": "lastModified",
"doc": "Audit stamp capturing the time and actor who last modified the Query."
},
{
"java": {
"class": "com.linkedin.common.urn.Urn"
},
"type": [
"null",
"string"
],
"name": "origin",
"default": null,
"doc": "The origin of the Query.\nThis is the source of the Query (e.g. a View, Stored Procedure, dbt Model, etc.) that the Query was created from."
}
],
"doc": "Information about a Query against one or more data assets (e.g. Tables or Views)."
}

querySubjects

Information about the subjects of a particular Query, i.e. the assets being queried.

Schema
{
"type": "record",
"Aspect": {
"name": "querySubjects"
},
"name": "QuerySubjects",
"namespace": "com.linkedin.query",
"fields": [
{
"type": {
"type": "array",
"items": {
"type": "record",
"name": "QuerySubject",
"namespace": "com.linkedin.query",
"fields": [
{
"Relationship": {
"entityTypes": [
"dataset",
"schemaField"
],
"name": "IsAssociatedWith"
},
"Searchable": {
"fieldName": "entities",
"fieldType": "URN"
},
"java": {
"class": "com.linkedin.common.urn.Urn"
},
"type": "string",
"name": "entity",
"doc": "An entity which is the subject of a query."
}
],
"doc": "A single subject of a particular query.\nIn the future, we may evolve this model to include richer details\nabout the Query Subject in relation to the query."
}
},
"name": "subjects",
"doc": "One or more subjects of the query.\n\nIn single-asset queries (e.g. table select), this will contain the Table reference\nand optionally schema field references.\n\nIn multi-asset queries (e.g. table joins), this may contain multiple Table references\nand optionally schema field references."
}
],
"doc": "Information about the subjects of a particular Query, i.e. the assets\nbeing queried."
}

status

The lifecycle status metadata of an entity, e.g. dataset, metric, feature, etc. This aspect is used to represent soft deletes conventionally.

Schema
{
"type": "record",
"Aspect": {
"name": "status"
},
"name": "Status",
"namespace": "com.linkedin.common",
"fields": [
{
"Searchable": {
"fieldType": "BOOLEAN"
},
"type": "boolean",
"name": "removed",
"default": false,
"doc": "Whether the entity has been removed (soft-deleted)."
}
],
"doc": "The lifecycle status metadata of an entity, e.g. dataset, metric, feature, etc.\nThis aspect is used to represent soft deletes conventionally."
}

dataPlatformInstance

The specific instance of the data platform that this entity belongs to

Schema
{
"type": "record",
"Aspect": {
"name": "dataPlatformInstance"
},
"name": "DataPlatformInstance",
"namespace": "com.linkedin.common",
"fields": [
{
"Searchable": {
"addToFilters": true,
"fieldType": "URN",
"filterNameOverride": "Platform"
},
"java": {
"class": "com.linkedin.common.urn.Urn"
},
"type": "string",
"name": "platform",
"doc": "Data Platform"
},
{
"Searchable": {
"addToFilters": true,
"fieldName": "platformInstance",
"fieldType": "URN",
"filterNameOverride": "Platform Instance"
},
"java": {
"class": "com.linkedin.common.urn.Urn"
},
"type": [
"null",
"string"
],
"name": "instance",
"default": null,
"doc": "Instance of the data platform (e.g. db instance)"
}
],
"doc": "The specific instance of the data platform that this entity belongs to"
}

subTypes

Sub Types. Use this aspect to specialize a generic Entity e.g. Making a Dataset also be a View or also be a LookerExplore

Schema
{
"type": "record",
"Aspect": {
"name": "subTypes"
},
"name": "SubTypes",
"namespace": "com.linkedin.common",
"fields": [
{
"Searchable": {
"/*": {
"addToFilters": true,
"fieldType": "KEYWORD",
"filterNameOverride": "Sub Type",
"queryByDefault": true
}
},
"type": {
"type": "array",
"items": "string"
},
"name": "typeNames",
"doc": "The names of the specific types."
}
],
"doc": "Sub Types. Use this aspect to specialize a generic Entity\ne.g. Making a Dataset also be a View or also be a LookerExplore"
}

queryUsageStatistics (Timeseries)

Stats corresponding to dataset's usage.

Schema
{
"type": "record",
"Aspect": {
"name": "queryUsageStatistics",
"type": "timeseries"
},
"name": "QueryUsageStatistics",
"namespace": "com.linkedin.query",
"fields": [
{
"type": "long",
"name": "timestampMillis",
"doc": "The event timestamp field as epoch at UTC in milli seconds."
},
{
"type": [
"null",
{
"type": "record",
"name": "TimeWindowSize",
"namespace": "com.linkedin.timeseries",
"fields": [
{
"type": {
"type": "enum",
"name": "CalendarInterval",
"namespace": "com.linkedin.timeseries",
"symbols": [
"SECOND",
"MINUTE",
"HOUR",
"DAY",
"WEEK",
"MONTH",
"QUARTER",
"YEAR"
]
},
"name": "unit",
"doc": "Interval unit such as minute/hour/day etc."
},
{
"type": "int",
"name": "multiple",
"default": 1,
"doc": "How many units. Defaults to 1."
}
],
"doc": "Defines the size of a time window."
}
],
"name": "eventGranularity",
"default": null,
"doc": "Granularity of the event if applicable"
},
{
"type": [
{
"type": "record",
"name": "PartitionSpec",
"namespace": "com.linkedin.timeseries",
"fields": [
{
"type": {
"type": "enum",
"name": "PartitionType",
"namespace": "com.linkedin.timeseries",
"symbols": [
"FULL_TABLE",
"QUERY",
"PARTITION"
]
},
"name": "type",
"default": "PARTITION"
},
{
"TimeseriesField": {},
"type": "string",
"name": "partition",
"doc": "String representation of the partition"
},
{
"type": [
"null",
{
"type": "record",
"name": "TimeWindow",
"namespace": "com.linkedin.timeseries",
"fields": [
{
"type": "long",
"name": "startTimeMillis",
"doc": "Start time as epoch at UTC."
},
{
"type": "com.linkedin.timeseries.TimeWindowSize",
"name": "length",
"doc": "The length of the window."
}
]
}
],
"name": "timePartition",
"default": null,
"doc": "Time window of the partition if applicable"
}
],
"doc": "Defines how the data is partitioned"
},
"null"
],
"name": "partitionSpec",
"default": {
"partition": "FULL_TABLE_SNAPSHOT",
"type": "FULL_TABLE",
"timePartition": null
},
"doc": "The optional partition specification."
},
{
"type": [
"null",
"string"
],
"name": "messageId",
"default": null,
"doc": "The optional messageId, if provided serves as a custom user-defined unique identifier for an aspect value."
},
{
"TimeseriesField": {},
"type": [
"null",
"int"
],
"name": "queryCount",
"default": null,
"doc": "Total query count in this bucket"
},
{
"TimeseriesField": {},
"type": [
"null",
"double"
],
"name": "queryCost",
"default": null,
"doc": "Query cost for this query and bucket"
},
{
"TimeseriesField": {},
"type": [
"null",
"long"
],
"name": "lastExecutedAt",
"default": null,
"doc": "Last executed timestamp"
},
{
"TimeseriesField": {},
"type": [
"null",
"int"
],
"name": "uniqueUserCount",
"default": null,
"doc": "Unique user count"
},
{
"TimeseriesFieldCollection": {
"key": "user"
},
"type": [
"null",
{
"type": "array",
"items": {
"type": "record",
"name": "DatasetUserUsageCounts",
"namespace": "com.linkedin.dataset",
"fields": [
{
"java": {
"class": "com.linkedin.common.urn.Urn"
},
"type": "string",
"name": "user",
"doc": "The unique id of the user."
},
{
"TimeseriesField": {},
"type": "int",
"name": "count",
"doc": "Number of times the dataset has been used by the user."
},
{
"TimeseriesField": {},
"type": [
"null",
"string"
],
"name": "userEmail",
"default": null,
"doc": "If user_email is set, we attempt to resolve the user's urn upon ingest"
}
],
"doc": "Records a single user's usage counts for a given resource"
}
}
],
"name": "userCounts",
"default": null,
"doc": "Users within this bucket, with frequency counts"
}
],
"doc": "Stats corresponding to dataset's usage."
}

Relationships

Outgoing

These are the relationships stored in this entity's aspects

  • IsAssociatedWith

    • Dataset via querySubjects.subjects.entity
    • SchemaField via querySubjects.subjects.entity

Global Metadata Model

Global Graph