Add GCS input to Cloudflare Logpush (#4720)

* initial commit to add gcs input to cloudflare_logpush * updated changelog * updated manifest with version * updated docs * updated docs * updated docs * updated docs as per pr suggetions * updated docs * updated PR with config changes & suggetions
elastic · Dec 1, 2022 · 941348f · 941348f
1 parent 028ad08
commit 941348f
Show file tree

Hide file tree

Showing 18 changed files with 866 additions and 3 deletions.
diff --git a/packages/cloudflare_logpush/_dev/build/docs/README.md b/packages/cloudflare_logpush/_dev/build/docs/README.md
@@ -65,6 +65,16 @@ This module has been tested against **Cloudflare version v4**.
   - Credentials for the above AWS S3 and SQS input types should be configured using the [link](https://www.elastic.co/guide/en/beats/filebeat/current/filebeat-input-aws-s3.html#aws-credentials-config).
   - Data collection via AWS S3 Bucket and AWS SQS are mutually exclusive in this case.
 
+### To collect data from GCS Buckets, follow the below steps:
+- Configure the [Data Forwarder](https://developers.cloudflare.com/logs/get-started/enable-destinations/google-cloud-storage/) to ingest data into a GCS bucket.
+- Configure the GCS bucket names and credentials along with the required configs under the "Collect Cloudflare Logpush logs via Google Cloud Storage" section. 
+- Make sure the service account and authentication being used, has proper levels of access to the GCS bucket [Manage Service Account Keys](https://cloud.google.com/iam/docs/creating-managing-service-account-keys/)
+
+**Note**:
+- The GCS input currently does not support fetching of buckets using bucket prefixes, so the bucket names have to be configured manually for each data stream.
+- The GCS input currently only accepts a service account JSON key or a service account JSON file for authentication.
+- The GCS input currently only supports json data.
+
 ### To collect data from the Cloudflare HTTP Endpoint, follow the below steps:
 - Reference link to [Enable HTTP destination](https://developers.cloudflare.com/logs/get-started/enable-destinations/http/) for Cloudflare Logpush.
 - Add same custom header along with its value on both the side for additional security.
@@ -88,7 +98,7 @@ curl --location --request POST 'https://api.cloudflare.com/client/v4/zones/<ZONE
 2. In the integrations search bar type **Cloudflare Logpush**.
 3. Click the **Cloudflare Logpush** integration from the search results.
 4. Click the **Add Cloudflare Logpush** button to add Cloudflare Logpush integration.
-5. Enable the Integration with the HTTP Endpoint or AWS S3 input.
+5. Enable the Integration with the HTTP Endpoint, AWS S3 input or GCS input.
 6. Under the AWS S3 input, there are two types of inputs: using AWS S3 Bucket or using SQS.
 7. Configure Cloudflare to send logs to the Elastic Agent.
 

diff --git a/packages/cloudflare_logpush/changelog.yml b/packages/cloudflare_logpush/changelog.yml
@@ -1,4 +1,9 @@
 # newer versions go on top
+- version: "0.4.0"
+  changes:
+    - description: Added support for GCS input.
+      type: enhancement
+      link: https://github.com/elastic/integrations/pull/4720
 - version: "0.3.0"
   changes:
     - description: Update package to ECS 8.5.0.

diff --git a/packages/cloudflare_logpush/data_stream/audit/agent/stream/gcs.yml.hbs b/packages/cloudflare_logpush/data_stream/audit/agent/stream/gcs.yml.hbs
@@ -0,0 +1,38 @@
+{{#if project_id}}
+project_id: {{project_id}}
+{{/if}}
+{{#if service_account_key}}
+auth.credentials_json.account_key: {{service_account_key}}
+{{/if}}
+{{#if service_account_file}}
+auth.credentials_file.path: {{service_account_file}}
+{{/if}}
+{{#if number_of_workers}}
+max_workers: {{number_of_workers}}
+{{/if}}
+{{#if polling}}
+poll: {{polling}}
+{{/if}}
+{{#if poll_interval}}
+poll_interval: {{poll_interval}}
+{{/if}}
+{{#if bucket_timeout}}
+bucket_timeout: {{bucket_timeout}}
+{{/if}}
+{{#if buckets}}
+buckets:
+{{buckets}}
+{{/if}}
+{{#if tags}}
+tags:
+{{#each tags as |tag|}}
+  - {{tag}}
+{{/each}}
+{{/if}}
+{{#contains "forwarded" tags}}
+publisher_pipeline.disable_host: true
+{{/contains}}
+{{#if processors}}
+processors:
+{{processors}}
+{{/if}}
diff --git a/packages/cloudflare_logpush/data_stream/audit/manifest.yml b/packages/cloudflare_logpush/data_stream/audit/manifest.yml
@@ -149,3 +149,81 @@ streams:
         show_user: false
         description: >-
           Processors are used to reduce the number of fields in the exported event or to enhance the event with metadata. This executes in the agent before the logs are parsed. See [Processors](https://www.elastic.co/guide/en/beats/filebeat/current/filtering-and-enhancing-data.html) for details.
+  - input: gcs
+    title: Audit logs via GCS
+    description: Collect Audit logs from Cloudflare.
+    template_path: gcs.yml.hbs
+    vars:
+      - name: max_workers
+        type: integer
+        title: 'Maximum number of workers'
+        multi: false
+        required: false
+        show_user: true
+        default: 3
+        description: Determines how many workers are spawned per bucket.
+      - name: poll
+        type: bool
+        title: 'Polling'
+        multi: false
+        required: false
+        show_user: true
+        default: true
+        description: Determines if the bucket will be continuously polled for new documents.
+      - name: poll_interval
+        type: text
+        title: 'Polling interval'
+        multi: false
+        required: false
+        show_user: true
+        default: 15s
+        description: Determines the time interval between polling operations.
+      - name: bucket_timeout
+        type: text
+        title: 'Bucket Timeout'
+        multi: false
+        required: false
+        show_user: true
+        default: 120s
+        description: Defines the maximum time that the sdk will wait for a bucket api response before timing out.
+      - name: buckets
+        type: yaml
+        title: Buckets
+        description: "This attribute contains the details about a specific bucket like, name, max_workers, poll, 
+        poll_interval and bucket_timeout. The attribute 'name' is specific to a bucket as it describes the bucket name, 
+        while the fields max_workers, poll, poll_interval and bucket_timeout can exist both at the bucket level and at the global level. 
+        If you have already defined the attributes globally, then you can only specify the name in this yaml config. If you want to override any specific 
+        attribute for a specific bucket, then, you can define it here. Any attribute defined in the yaml will override the global definitions.
+        Please see the relevant [Documentation](https://www.elastic.co/guide/en/beats/filebeat/8.5/filebeat-input-gcs.html#attrib-buckets) for further information.\n"
+        required: true
+        show_user: true
+        default: |
+          # You can define as many buckets as you want here.
+
+          - name: audit_logs
+          - name: audit_logs_2
+
+          # The config below is an example of how to override the global config.
+
+          #- name: audit_logs_3
+          #  max_workers: 3
+          #  poll: true
+          #  poll_interval: 10s
+          #  bucket_timeout: 30s
+      - name: processors
+        type: yaml
+        title: Processors
+        multi: false
+        required: false
+        show_user: false
+        description: |
+          Processors are used to reduce the number of fields in the exported event or to enhance the event with metadata. This executes in the agent before the logs are parsed. See [Processors](https://www.elastic.co/guide/en/beats/filebeat/current/filtering-and-enhancing-data.html) for details.
+      - name: tags
+        type: text
+        title: Tags
+        multi: true
+        required: true
+        show_user: false
+        default:
+          - forwarded
+          - cloudflare_logpush_audit
diff --git a/packages/cloudflare_logpush/data_stream/dns/agent/stream/gcs.yml.hbs b/packages/cloudflare_logpush/data_stream/dns/agent/stream/gcs.yml.hbs
@@ -0,0 +1,38 @@
+{{#if project_id}}
+project_id: {{project_id}}
+{{/if}}
+{{#if service_account_key}}
+auth.credentials_json.account_key: {{service_account_key}}
+{{/if}}
+{{#if service_account_file}}
+auth.credentials_file.path: {{service_account_file}}
+{{/if}}
+{{#if number_of_workers}}
+max_workers: {{number_of_workers}}
+{{/if}}
+{{#if polling}}
+poll: {{polling}}
+{{/if}}
+{{#if poll_interval}}
+poll_interval: {{poll_interval}}
+{{/if}}
+{{#if bucket_timeout}}
+bucket_timeout: {{bucket_timeout}}
+{{/if}}
+{{#if buckets}}
+buckets:
+{{buckets}}
+{{/if}}
+{{#if tags}}
+tags:
+{{#each tags as |tag|}}
+  - {{tag}}
+{{/each}}
+{{/if}}
+{{#contains "forwarded" tags}}
+publisher_pipeline.disable_host: true
+{{/contains}}
+{{#if processors}}
+processors:
+{{processors}}
+{{/if}}
diff --git a/packages/cloudflare_logpush/data_stream/dns/manifest.yml b/packages/cloudflare_logpush/data_stream/dns/manifest.yml
@@ -149,3 +149,81 @@ streams:
         show_user: false
         description: >-
           Processors are used to reduce the number of fields in the exported event or to enhance the event with metadata. This executes in the agent before the logs are parsed. See [Processors](https://www.elastic.co/guide/en/beats/filebeat/current/filtering-and-enhancing-data.html) for details.
+  - input: gcs
+    title: DNS logs via GCS
+    description: Collect DNS logs from Cloudflare.
+    template_path: gcs.yml.hbs
+    vars:
+      - name: max_workers
+        type: integer
+        title: 'Maximum number of workers'
+        multi: false
+        required: false
+        show_user: true
+        default: 3
+        description: Determines how many workers are spawned per bucket.
+      - name: poll
+        type: bool
+        title: 'Polling'
+        multi: false
+        required: false
+        show_user: true
+        default: true
+        description: Determines if the bucket will be continuously polled for new documents.
+      - name: poll_interval
+        type: text
+        title: 'Polling interval'
+        multi: false
+        required: false
+        show_user: true
+        default: 15s
+        description: Determines the time interval between polling operations.
+      - name: bucket_timeout
+        type: text
+        title: 'Bucket Timeout'
+        multi: false
+        required: false
+        show_user: true
+        default: 120s
+        description: Defines the maximum time that the sdk will wait for a bucket api response before timing out.
+      - name: buckets
+        type: yaml
+        title: Buckets
+        description: "This attribute contains the details about a specific bucket like, name, max_workers, poll, 
+        poll_interval and bucket_timeout. The attribute 'name' is specific to a bucket as it describes the bucket name, 
+        while the fields max_workers, poll, poll_interval and bucket_timeout can exist both at the bucket level and at the global level. 
+        If you have already defined the attributes globally, then you can only specify the name in this yaml config. If you want to override any specific 
+        attribute for a specific bucket, then, you can define it here. Any attribute defined in the yaml will override the global definitions.
+        Please see the relevant [Documentation](https://www.elastic.co/guide/en/beats/filebeat/8.5/filebeat-input-gcs.html#attrib-buckets) for further information.\n"
+        required: true
+        show_user: true
+        default: |
+          # You can define as many buckets as you want here.
+
+          - name: dns_logs
+          - name: dns_logs_2
+
+          # The config below is an example of how to override the global config.
+
+          #- name: dns_logs_3
+          #  max_workers: 3
+          #  poll: true
+          #  poll_interval: 10s
+          #  bucket_timeout: 30s
+      - name: processors
+        type: yaml
+        title: Processors
+        multi: false
+        required: false
+        show_user: false
+        description: |
+          Processors are used to reduce the number of fields in the exported event or to enhance the event with metadata. This executes in the agent before the logs are parsed. See [Processors](https://www.elastic.co/guide/en/beats/filebeat/current/filtering-and-enhancing-data.html) for details.
+      - name: tags
+        type: text
+        title: Tags
+        multi: true
+        required: true
+        show_user: false
+        default:
+          - forwarded
+          - cloudflare_logpush_audit
diff --git a/packages/cloudflare_logpush/data_stream/firewall_event/agent/stream/gcs.yml.hbs b/packages/cloudflare_logpush/data_stream/firewall_event/agent/stream/gcs.yml.hbs
@@ -0,0 +1,38 @@
+{{#if project_id}}
+project_id: {{project_id}}
+{{/if}}
+{{#if service_account_key}}
+auth.credentials_json.account_key: {{service_account_key}}
+{{/if}}
+{{#if service_account_file}}
+auth.credentials_file.path: {{service_account_file}}
+{{/if}}
+{{#if number_of_workers}}
+max_workers: {{number_of_workers}}
+{{/if}}
+{{#if polling}}
+poll: {{polling}}
+{{/if}}
+{{#if poll_interval}}
+poll_interval: {{poll_interval}}
+{{/if}}
+{{#if bucket_timeout}}
+bucket_timeout: {{bucket_timeout}}
+{{/if}}
+{{#if buckets}}
+buckets:
+{{buckets}}
+{{/if}}
+{{#if tags}}
+tags:
+{{#each tags as |tag|}}
+  - {{tag}}
+{{/each}}
+{{/if}}
+{{#contains "forwarded" tags}}
+publisher_pipeline.disable_host: true
+{{/contains}}
+{{#if processors}}
+processors:
+{{processors}}
+{{/if}}