9609292c286c195f2fe3dc2158b7cbcf6c0a224b
[samza.git] / samza-api / src / main / java / org / apache / samza / operators / windows / Window.java
1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing,
13 * software distributed under the License is distributed on an
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 * KIND, either express or implied. See the License for the
16 * specific language governing permissions and limitations
17 * under the License.
18 */
19 package org.apache.samza.operators.windows;
20
21 import org.apache.samza.annotation.InterfaceStability;
22 import org.apache.samza.operators.triggers.Trigger;
23
24 /**
25 * Groups incoming messages in the {@link org.apache.samza.operators.MessageStream} into finite
26 * windows for processing.
27 *
28 * <p> A window is uniquely identified by its {@link WindowKey}. A window can have one or more associated {@link Trigger}s
29 * that determine when results from the {@link Window} are emitted.
30 *
31 * <p> Each emitted result contains one or more messages in the window and is called a {@link WindowPane}.
32 * A pane can include all messages collected for the window so far or only the new messages
33 * since the last emitted pane. (as determined by the {@link AccumulationMode})
34 *
35 * <p> A window can have early triggers that allow emitting {@link WindowPane}s speculatively before all data for the window
36 * has arrived or late triggers that allow handling of late data arrivals.
37 *
38 * <p> A {@link Window} is defined as "keyed" when the incoming {@link org.apache.samza.operators.MessageStream} is first
39 * partitioned based on the provided key, and windowing is applied on the partitioned stream.
40 *
41 * window wk1 (with its triggers)
42 * +--------------------------------+
43 * ------------+--------+-----------+
44 * | | | |
45 * | pane 1 |pane2 | pane3 |
46 * +-----------+--------+-----------+
47 *
48 -----------------------------------
49 *incoming message stream ------+
50 -----------------------------------
51 * window wk2
52 * +---------------------+---------+
53 * | pane 1| pane 2 | pane 3 |
54 * | | | |
55 * +---------+-----------+---------+
56 *
57 * window wk3
58 * +----------+-----------+---------+
59 * | | | |
60 * | pane 1 | pane 2 | pane 3|
61 * | | | |
62 * +----------+-----------+---------+
63 *
64 *
65 * <p> Use the {@link Windows} APIs to create various windows and the {@link org.apache.samza.operators.triggers.Triggers}
66 * APIs to create triggers.
67 *
68 * @param <M> the type of the input message
69 * @param <K> the type of the key in the message in this {@link org.apache.samza.operators.MessageStream}.
70 * @param <WV> the type of the value in the {@link WindowPane}.
71 */
72 @InterfaceStability.Unstable
73 public interface Window<M, K, WV> {
74
75 /**
76 * Set the early triggers for this {@link Window}.
77 * <p>Use the {@link org.apache.samza.operators.triggers.Triggers} APIs to create instances of {@link Trigger}
78 *
79 * @param trigger the early trigger
80 * @return the {@link Window} function with the early trigger
81 */
82 Window<M, K, WV> setEarlyTrigger(Trigger<M> trigger);
83
84 /**
85 * Set the late triggers for this {@link Window}.
86 * <p>Use the {@link org.apache.samza.operators.triggers.Triggers} APIs to create instances of {@link Trigger}
87 *
88 * @param trigger the late trigger
89 * @return the {@link Window} function with the late trigger
90 */
91 Window<M, K, WV> setLateTrigger(Trigger<M> trigger);
92
93 /**
94 * Specify how a {@link Window} should process its previously emitted {@link WindowPane}s.
95 *
96 * <p> There are two types of {@link AccumulationMode}s:
97 * <ul>
98 * <li> ACCUMULATING: Specifies that window panes should include all messages collected for the window (key) so far, even if they were
99 * included in previously emitted window panes.
100 * <li> DISCARDING: Specifies that window panes should only include messages collected for this window (key) since the last emitted
101 * window pane.
102 * </ul>
103 *
104 * @param mode the accumulation mode
105 * @return the {@link Window} function with the specified {@link AccumulationMode}.
106 */
107 Window<M, K, WV> setAccumulationMode(AccumulationMode mode);
108
109 }