Closed
Bug 1846537
Opened 2 years ago
Closed 2 years ago
Airflow taskdim_active_users_aggregates_mobile.fenix_active_users_aggregates_dim_check failed for exec_date 2023-08-01
Categories
(Data Platform and Tools :: General, defect)
Data Platform and Tools
General
Tracking
(Not tracked)
RESOLVED
DUPLICATE
of bug 1836808
People
(Reporter: linh, Assigned: kik)
Details
(Whiteboard: [airflow-triage])
Airflow taskdim_active_users_aggregates_mobile.fenix_active_users_aggregates_dim_check failed for exec_date 2023-08-01
Log extract:
[2023-08-01, 09:05:11 EDT] {taskinstance.py:1776} ERROR - Task failed with exception
Traceback (most recent call last):
File "/home/airflow/.local/lib/python3.10/site-packages/airflow/providers/google/cloud/operators/kubernetes_engine.py", line 532, in execute
result = super().execute(context)
File "/home/airflow/.local/lib/python3.10/site-packages/airflow/providers/cncf/kubernetes/operators/kubernetes_pod.py", line 516, in execute
return self.execute_sync(context)
File "/home/airflow/.local/lib/python3.10/site-packages/airflow/providers/cncf/kubernetes/operators/kubernetes_pod.py", line 545, in execute_sync
self.cleanup(
File "/home/airflow/.local/lib/python3.10/site-packages/airflow/providers/cncf/kubernetes/operators/kubernetes_pod.py", line 671, in cleanup
raise AirflowException(
airflow.exceptions.AirflowException: Pod fenix-active-users-aggregates-dim-check-ed5lpt4u returned a failure:
remote_pod: {'api_version': 'v1',
'kind': 'Pod',
'metadata': {'annotations': {'cluster-autoscaler.kubernetes.io/safe-to-evict': 'true'},
'cluster_name': None,
'creation_timestamp': datetime.datetime(2023, 8, 1, 13, 4, 53, tzinfo=tzlocal()),
'deletion_grace_period_seconds': None,
'deletion_timestamp': None,
'finalizers': None,
'generate_name': None,
'generation': None,
'labels': {'airflow_kpo_in_cluster': 'False',
'airflow_version': '2.5.3',
'dag_id': 'dim_active_users_aggregates_mobile',
'kubernetes_pod_operator': 'True',
'run_id': 'scheduled__2023-07-31T0430000000-5eeddb47d',
'task_id': 'fenix_active_users_aggregates_dim_check',
'try_number': '2'},
'managed_fields': [{'api_version': 'v1',
'fields_type': 'FieldsV1',
'fields_v1': {'f:metadata': {'f:annotations': {'.': {},
'f:cluster-autoscaler.kubernetes.io/safe-to-evict': {}},
'f:labels': {'.': {},
'f:airflow_kpo_in_cluster': {},
'f:airflow_version': {},
'f:dag_id': {},
'f:kubernetes_pod_operator': {},
'f:run_id': {},
'f:task_id': {},
'f:try_number': {}}},
'f:spec': {'f:affinity': {},
'f:containers': {'k:{"name":"base"}': {'.': {},
'f:args': {},
'f:env': {'.': {},
'k:{"name":"SLACK_BOT_TOKEN"}': {'.': {},
'f:name': {},
'f:value': {}}},
'f:image': {},
'f:imagePullPolicy': {},
'f:name': {},
'f:resources': {},
'f:terminationMessagePath': {},
'f:terminationMessagePolicy': {}}},
'f:dnsPolicy': {},
'f:enableServiceLinks': {},
'f:restartPolicy': {},
'f:schedulerName': {},
'f:securityContext': {},
'f:terminationGracePeriodSeconds': {}}},
'manager': 'OpenAPI-Generator',
'operation': 'Update',
'subresource': None,
'time': datetime.datetime(2023, 8, 1, 13, 4, 53, tzinfo=tzlocal())},
{'api_version': 'v1',
'fields_type': 'FieldsV1',
'fields_v1': {'f:status': {'f:conditions': {'k:{"type":"ContainersReady"}': {'.': {},
'f:lastProbeTime': {},
'f:lastTransitionTime': {},
'f:reason': {},
'f:status': {},
'f:type': {}},
'k:{"type":"Initialized"}': {'.': {},
'f:lastProbeTime': {},
'f:lastTransitionTime': {},
'f:status': {},
'f:type': {}},
'k:{"type":"Ready"}': {'.': {},
'f:lastProbeTime': {},
'f:lastTransitionTime': {},
'f:reason': {},
'f:status': {},
'f:type': {}}},
'f:containerStatuses': {},
'f:hostIP': {},
'f:phase': {},
'f:podIP': {},
'f:podIPs': {'.': {},
'k:{"ip":"10.4.4.199"}': {'.': {},
'f:ip': {}}},
'f:startTime': {}}},
'manager': 'kubelet',
'operation': 'Update',
'subresource': 'status',
'time': datetime.datetime(2023, 8, 1, 13, 5, 11, tzinfo=tzlocal())}],
'name': 'fenix-active-users-aggregates-dim-check-ed5lpt4u',
'namespace': 'default',
'owner_references': None,
'resource_version': '520442586',
'self_link': None,
'uid': '3fa66c36-e601-4964-838f-f9648d8bb6d1'},
'spec': {'active_deadline_seconds': None,
'affinity': {'node_affinity': None,
'pod_affinity': None,
'pod_anti_affinity': None},
'automount_service_account_token': None,
'containers': [{'args': ['dim',
'run',
'--fail_process_on_failure',
'--project_id=mozdata',
'--dataset=fenix',
'--table=active_users_aggregates',
'--date=2023-07-30'],
'command': None,
'env': [{'name': 'SLACK_BOT_TOKEN',
'value': '***',
'value_from': None}],
'env_from': None,
'image': 'gcr.io/moz-fx-data-airflow-prod-88e0/dim:latest',
'image_pull_policy': 'Always',
'lifecycle': None,
'liveness_probe': None,
'name': 'base',
'ports': None,
'readiness_probe': None,
'resources': {'limits': None, 'requests': None},
'security_context': None,
'startup_probe': None,
'stdin': None,
'stdin_once': None,
'termination_message_path': '/dev/termination-log',
'termination_message_policy': 'File',
'tty': None,
'volume_devices': None,
'volume_mounts': [{'mount_path': '/var/run/secrets/kubernetes.io/serviceaccount',
'mount_propagation': None,
'name': 'kube-api-access-4txbj',
'read_only': True,
'sub_path': None,
'sub_path_expr': None}],
'working_dir': None}],
'dns_config': None,
'dns_policy': 'ClusterFirst',
'enable_service_links': True,
'ephemeral_containers': None,
'host_aliases': None,
'host_ipc': None,
'host_network': None,
'host_pid': None,
'hostname': None,
'image_pull_secrets': None,
'init_containers': None,
'node_name': 'gke-workloads-prod-v1-highmem-pool-v1-6f51d78b-bd27',
'node_selector': None,
'os': None,
'overhead': None,
'preemption_policy': 'PreemptLowerPriority',
'priority': 0,
'priority_class_name': None,
'readiness_gates': None,
'restart_policy': 'Never',
'runtime_class_name': None,
'scheduler_name': 'default-scheduler',
'security_context': {'fs_group': None,
'fs_group_change_policy': None,
'run_as_group': None,
'run_as_non_root': None,
'run_as_user': None,
'se_linux_options': None,
'seccomp_profile': None,
'supplemental_groups': None,
'sysctls': None,
'windows_options': None},
'service_account': 'default',
'service_account_name': 'default',
'set_hostname_as_fqdn': None,
'share_process_namespace': None,
'subdomain': None,
'termination_grace_period_seconds': 30,
'tolerations': [{'effect': 'NoExecute',
'key': 'node.kubernetes.io/not-ready',
'operator': 'Exists',
'toleration_seconds': 300,
'value': None},
{'effect': 'NoExecute',
'key': 'node.kubernetes.io/unreachable',
'operator': 'Exists',
'toleration_seconds': 300,
'value': None}],
'topology_spread_constraints': None,
'volumes': [{'aws_elastic_block_store': None,
'azure_disk': None,
'azure_file': None,
'cephfs': None,
'cinder': None,
'config_map': None,
'csi': None,
'downward_api': None,
'empty_dir': None,
'ephemeral': None,
'fc': None,
'flex_volume': None,
'flocker': None,
'gce_persistent_disk': None,
'git_repo': None,
'glusterfs': None,
'host_path': None,
'iscsi': None,
'name': 'kube-api-access-4txbj',
'nfs': None,
'persistent_volume_claim': None,
'photon_persistent_disk': None,
'portworx_volume': None,
'projected': {'default_mode': 420,
'sources': [{'config_map': None,
'downward_api': None,
'secret': None,
'service_account_token': {'audience': None,
'expiration_seconds': 3607,
'path': 'token'}},
{'config_map': {'items': [{'key': 'ca.crt',
'mode': None,
'path': 'ca.crt'}],
'name': 'kube-root-ca.crt',
'optional': None},
'downward_api': None,
'secret': None,
'service_account_token': None},
{'config_map': None,
'downward_api': {'items': [{'field_ref': {'api_version': 'v1',
'field_path': 'metadata.namespace'},
'mode': None,
'path': 'namespace',
'resource_field_ref': None}]},
'secret': None,
'service_account_token': None}]},
'quobyte': None,
'rbd': None,
'scale_io': None,
'secret': None,
'storageos': None,
'vsphere_volume': None}]},
'status': {'conditions': [{'last_probe_time': None,
'last_transition_time': datetime.datetime(2023, 8, 1, 13, 4, 54, tzinfo=tzlocal()),
'message': None,
'reason': None,
'status': 'True',
'type': 'Initialized'},
{'last_probe_time': None,
'last_transition_time': datetime.datetime(2023, 8, 1, 13, 5, 9, tzinfo=tzlocal()),
'message': None,
'reason': 'PodFailed',
'status': 'False',
'type': 'Ready'},
{'last_probe_time': None,
'last_transition_time': datetime.datetime(2023, 8, 1, 13, 5, 9, tzinfo=tzlocal()),
'message': None,
'reason': 'PodFailed',
'status': 'False',
'type': 'ContainersReady'},
{'last_probe_time': None,
'last_transition_time': datetime.datetime(2023, 8, 1, 13, 4, 53, tzinfo=tzlocal()),
'message': None,
'reason': None,
'status': 'True',
'type': 'PodScheduled'}],
'container_statuses': [{'container_id': 'containerd://9cfb771a32e1c323bfe59bb726ae872842d0ae4ccf8629d2f596e2b891f0ad9b',
'image': 'gcr.io/moz-fx-data-airflow-prod-88e0/dim:latest',
'image_id': 'gcr.io/moz-fx-data-airflow-prod-88e0/dim@sha256:0024b734ced12b5a9a1abe99ad035fcde26a0e0a7a3b11c39ba40b886ad8048a',
'last_state': {'running': None,
'terminated': None,
'waiting': None},
'name': 'base',
'ready': False,
'restart_count': 0,
'started': False,
'state': {'running': None,
'terminated': {'container_id': 'containerd://9cfb771a32e1c323bfe59bb726ae872842d0ae4ccf8629d2f596e2b891f0ad9b',
'exit_code': 1,
'finished_at': datetime.datetime(2023, 8, 1, 13, 5, 8, tzinfo=tzlocal()),
'message': None,
'reason': 'Error',
'signal': None,
'started_at': datetime.datetime(2023, 8, 1, 13, 4, 55, tzinfo=tzlocal())},
'waiting': None}}],
'ephemeral_container_statuses': None,
'host_ip': '10.8.0.47',
'init_container_statuses': None,
'message': None,
'nominated_node_name': None,
'phase': 'Failed',
'pod_i_ps': [{'ip': '10.4.4.199'}],
'pod_ip': '10.4.4.199',
'qos_class': 'BestEffort',
'reason': None,
'start_time': datetime.datetime(2023, 8, 1, 13, 4, 54, tzinfo=tzlocal())}}
Comment 1•2 years ago
|
||
This failed again on 2023-08-06
Comment 2•2 years ago
|
||
Investigated on https://bugzilla.mozilla.org/show_bug.cgi?id=1836808.
You need to log in
before you can comment on or make changes to this bug.
Description
•